# 
#  model3.r
#  
# This file replicates results for the main model with Dirichlet process random effects 
# It: (1) prepares the data (centering, z-std.), (2) runs JAGS, 
# (3) calculates posterior summaries and quantities of interest
# (4) calculates differences in steady state estimates to normal RE model
# 
# Note: the corresponding JAGS model file is "model3.bug"
# 



rm(list=ls())

# Load Libs
library(foreign)
library(rjags)
library(coda)
# Load helper functions
source("functions.R")



# =========================================================================
# =                         Data                                 	  =
# =========================================================================

dat <- read.dta("data1samp.dta", convert.factors = FALSE)

## partental background dummies
dat$zg1 <- ifelse(dat$zgold==1, 1, 0)
dat$zg2 <- ifelse(dat$zgold==2, 1, 0)
dat$zg3 <- ifelse(dat$zgold==3, 1, 0)
dat$zg4 <- ifelse(dat$zgold==4, 1, 0)
## standardize continuous vars
dat$age <- zstd(dat$age, gelman=TRUE)
dat$nkids <- zstd(dat$nkids, gelman=TRUE)
dat$hhsize <- zstd(dat$hhsize, gelman=TRUE)
dat$ownocc <- zstd(dat$ownocc, gelman=TRUE)
dat$hsequi <- zstd(dat$hsequi, gelman=TRUE)
dat$incshr <- zstd(dat$incshr, gelman=TRUE)
dat$perminc <- zstd(dat$perminc, gelman=TRUE)
dat$transinc <- zstd(dat$transinc, gelman=TRUE)
## center dummy vars
dat$div <- center(dat$div)
dat$uempl <- center(dat$uempl)
dat$union <- center(dat$union)
dat$fem <- center(dat$fem)
dat$nonwhite <- center(dat$nonwhite)
dat$degree <- center(dat$degree)
dat$alvl <- center(dat$alvl)
dat$olvl <- center(dat$olvl)
dat$zg1 <- center(dat$zg1)
dat$zg2 <- center(dat$zg2)
dat$zg3 <- center(dat$zg3)
dat$zg4 <- center(dat$zg4)
dat$london <- center(dat$london)

## reshape data:
## time varying variables
y <- as.matrix(reshape(subset(dat, select=c(y,pid,waven)), 
	timevar="waven", idvar="pid", direction="wide")[,-1])
age <- as.matrix(reshape(subset(dat, select=c(age,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
nkids <- as.matrix(reshape(subset(dat, select=c(nkids,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
hhsize <- as.matrix(reshape(subset(dat, select=c(hhsize,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
ownocc <- as.matrix(reshape(subset(dat, select=c(ownocc,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
hsequi <- as.matrix(reshape(subset(dat, select=c(hsequi,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
incshr <- as.matrix(reshape(subset(dat, select=c(incshr,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
perminc <- as.matrix(reshape(subset(dat, select=c(perminc,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
transinc <- as.matrix(reshape(subset(dat, select=c(transinc,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
div <- as.matrix(reshape(subset(dat, select=c(div,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
uempl <- as.matrix(reshape(subset(dat, select=c(uempl,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])
union <- as.matrix(reshape(subset(dat, select=c(union,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,-1])

# Time constant variables
fem <- reshape(subset(dat, select=c(female,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
nw <- reshape(subset(dat, select=c(nonwhite,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
degree <- reshape(subset(dat, select=c(degree,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
alvl <- reshape(subset(dat, select=c(alvl,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
olvl <- reshape(subset(dat, select=c(olvl,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
zg1 <- reshape(subset(dat, select=c(zg1,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
zg2 <- reshape(subset(dat, select=c(zg2,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
zg3 <- reshape(subset(dat, select=c(zg3,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
zg4 <- reshape(subset(dat, select=c(zg4,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]
lond <- reshape(subset(dat, select=c(london,pid,waven)), 
		timevar="waven", idvar="pid", direction="wide")[,2]


N <- dim(y)[1]

M <- 20

jags.data <- gen.list(c("y", "age", "nkids", "hhsize", "ownocc", "hsequi", "incshr", "perminc", "transinc", "div", "uempl", "fem", "nw", "degree", "alvl", "olvl", "zg1", "zg2", "zg3", "zg4", "union", "lond", "N", "M"))


# ====================================================================
# =                       MCMC samples                               =
# ====================================================================

load.module("glm")

# cutpoint initial values
z <- jags.data$y
z[z==0] <- -0.5
z[z==1] <- 0.5
z[z==2] <- 1.5
temp1 <- subset(dat, dat$waven==1)
temp2 <- subset(dat, dat$waven>1)
coefb <- coef(MASS::polr(as.factor(y)~age+nkids+hhsize+ownocc+hsequi+incshr+perminc+transinc+div+
	uempl+female+nonwhite+degree+alvl+olvl+union, data=temp2))
coefd <- coef(MASS::polr(as.factor(y)~age+nkids+hhsize+ownocc+hsequi+incshr+perminc+transinc+div+
	uempl+female+nonwhite+degree+alvl+olvl+union+zg1+zg2+zg3+zg4+london,data=temp1))

init <- list(
	list(z=z, gamma=0.229, scale=1.341, beta=coefb, delta=coefd, cut.delta=0.825, beta0=0.411,
		 "RNG.seed"=12345),
	list(z=z, gamma=0.229, scale=1.341, beta=coefb, delta=coefd, cut.delta=0.825, beta0=0.411,
		"RNG.seed"=54321))

m3 <- jags.model(file="model3.bug", data=jags.data, n.chains=2, n.adapt=2e3, init=init)
update(m3, 2e4)
mon <- c("beta0", "beta", "gamma", "delta", "scale","cut[2]", "K", "ppp.obs", "ppp.pred", "TMatch", "ARerr", "alpha")
m3.out <- coda.samples(m3, variable.names=mon, n.iter=2e4*25, thin=25)

save(m3.out, file="model3_samples.Rdata")




# ====================================================================
# =                       Analyses                                   =
# ====================================================================

## Convergence diagnostics

## Gelman-Rubin diag (excludes post. pred. check values)
gelman.diag(mcmc.list(m3.out[[1]][,c(4:44,51)], m3.out[[2]][,c(4:44,51)]))
## combine samples from 2 chains
m3.out <- runjags::combine.mcmc(list(m3.out[[1]],m3.out[[2]]))
## Geweke diagnostics
geweke.diag(m3.out)


# ==============================================
# =     Posterior summary   
# ==============================================

sum.coda(m3.out)




# ==============================================
# =         Long-term effects    
# ==============================================

# steady state effects (y* metric):

# sample 5000 values from posterior
m3.coef.samp <- apply(m3.out[,6:21], 2, sample, 5000)
m3.gamma.samp <- sample(m3.out[,"gamma"], 5000)
m3.steady.state <- m3.coef.samp / (1-m3.gamma.samp)
sum.coda(m3.steady.state)


# steady state effects (P(y=3))

# sample of 5000 values from posterior
m3.coef.samp2 <- apply(m3.out, 2, sample, 5000)

# Predicted probabilities
# Function inputs:
# sims: matrix of simulated coef values
# xvals vector of beta's X-values
# x-values order in model:
# age nkids hhsi own equi incshr perm trans div uempl fem nw
# degree alvl olvl union
# remember: all inputs have mean 0, continous var have sd=0.5

predict <- function(sims, xvals) {
	alpha <- sims[,"beta0"]
	gamma <- sims[,"gamma"]
	cut <- sims[,"cut[2]"]
	beta.mat <- sims[,6:21]
	beta.mat.scaled <- beta.mat/(1-gamma)
	linpred <- alpha + beta.mat.scaled%*%xvals
	p1 <- pnorm(0 - linpred)
	p2 <- pnorm(cut - linpred) - (pnorm(0 - linpred))
	p3 <- 1 - pnorm(cut - linpred)
	return(cbind(p1,p2,p3))
}

# calculate 1 sd change for continuous vars
# output: mean,sd,low,high hpd for cat.1 and cat.3
diffs <- function(values, sims) {
	out <- matrix(NA, ncol=8, nrow=length(values))
	j <- 1
	for (i in values){
		xvals  <- rep(0,16)
		xvals[i] <- -0.5
		p0 <- predict(sims, xvals)
		xvals[i] <- 0.5
		p1 <- predict(sims, xvals)
		out[j,1:4] <- sum.coda(as.matrix(p1[,1]-p0[,1]))
		out[j,5:8] <- sum.coda(as.matrix(p1[,3]-p0[,3]))
		j <- j+1
	}
	return(out)
}
# 

# Calculation for continous variables
round(diffs(c(1,2,3,5,6,7,8), m3.coef.samp2),3)


# Calculations for discrete vars

# Union
xvals  <- rep(0,16)
xvals[16] <- -0.231313578062804
p0 <- predict(m3.coef.samp2, xvals)
xvals[16] <- 0.768686421937196
p1 <- predict(m3.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# Female
xvals  <- rep(0,16)
xvals[11] <- -0.514705882352941
p0 <- predict(m3.coef.samp2, xvals)
xvals[11] <- 0.485294117647059
p1 <- predict(m3.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# Nonwhite
xvals  <- rep(0,16)
xvals[12] <- -0.0340557275541796
p0 <- predict(m3.coef.samp2, xvals)
xvals[12] <- 0.96594427244582
p1 <- predict(m3.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# Degree
xvals  <- rep(0,16)
xvals[13] <- -0.196594427244582
p0 <- predict(m3.coef.samp2, xvals)
xvals[13] <- 0.803405572755418
p1 <- predict(m3.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# Alevel
xvals  <- rep(0,16)
xvals[14] <- -0.190402476780186
p0 <- predict(m3.coef.samp2, xvals)
xvals[14] <- 0.809597523219814
p1 <- predict(m3.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# Olevel
xvals  <- rep(0,16)
xvals[15] <- -0.419504643962848
p0 <- predict(m3.coef.samp2, xvals)
xvals[15] <- 0.580495356037152
p1 <- predict(m3.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# H owner
xvals  <- rep(0,16)
xvals[4] <- -1.03954957105278
p0 <- predict(m3.coef.samp2, xvals)
xvals[4] <- 0.240462181241481
p1 <- predict(m3.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# Divorced
xvals  <- rep(0,16)
xvals[9] <- -0.0568332596196373 
p0 <- predict(m3.coef.samp2, xvals)
xvals[9] <- 0.943166740380363
p1 <- predict(m3.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))

# Unempl
xvals  <- rep(0,16)
xvals[10] <- -0.0339451570101725
p0 <- predict(m3.coef.samp2, xvals)
xvals[10] <- 0.966054842989827
p1 <- predict(m3.coef.samp2, xvals)
sum.coda(as.matrix(p1[,1]-p0[,1]))
sum.coda(as.matrix(p1[,3]-p0[,3]))





# ==========================================================
# =  Difference in steady state estimates DP to normal RE: =
# ==========================================================

# Load N() RE results

load("model1_samples.Rdata")
m1_nRE <- runjags::combine.mcmc(list(m1.out[[1]],m1.out[[2]]))


# predict_nRE function calculates predictions
# diffs_NDP calculates diff between N() RE and DP() RE

predict_nRE <- function(sims, xvals) {
	alpha <- sims[,"alpha"]
	gamma <- sims[,"gamma"]
	cut <- sims[,"cut[2]"]
	beta.mat <- sims[,5:20]
	beta.mat.scaled <- beta.mat/(1-gamma)
	linpred <- alpha + beta.mat.scaled%*%xvals
	p1 <- pnorm(0 - linpred)
	p2 <- pnorm(cut - linpred) - (pnorm(0 - linpred))
	p3 <- 1 - pnorm(cut - linpred)
	return(cbind(p1,p2,p3))
}

diffs_NDP <- function(values, sims.N, sims.DP) {
	out <- matrix(NA, ncol=8, nrow=length(values))
	j <- 1
	for (i in values){
		xvals  <- rep(0,16)
		xvals[i] <- -0.5
		p0.dp <- predict(sims.DP, xvals)
		p0.n <- predict_nRE(sims.N, xvals)
		xvals[i] <- 0.5
		p1.dp <- predict(sims.DP, xvals)
		p1.n <- predict_nRE(sims.N, xvals)
		out[j,1:4] <- sum.coda(as.matrix((p1.n[,1]-p0.n[,1])-
			(p1.dp[,1]-p0.dp[,1])))
		out[j,5:8] <- sum.coda(as.matrix((p1.n[,3]-p0.n[,3])-
			(p1.dp[,3]-p0.dp[,3])))
		j <- j+1
	}
	return(out)
}

# set up empty matrix
predictions.diff <- matrix(NA, ncol=8, nrow=16)

# calculate differences in predictions for continuous covariates
predictions.diff[1:7,1:8] <- round(diffs_NDP(c(1,2,3,5,6,7,8), m1_nRE, m1_dpRE1), 3)

# calculations for discrete covariates

# Union
xvals  <- rep(0,16)
xvals[16] <- -0.231313578062804
p0.dp <- predict(m1_dpRE1, xvals)
p0.n <- predict_nRE(m1_nRE, xvals)
xvals[16] <- 0.768686421937196
p1.dp <- predict(m1_dpRE1, xvals)
p1.n <- predict_nRE(m1_nRE, xvals)
predictions.diff[8,1:8] <- sum.coda(as.matrix((p1.n[,1]-p0.n[,1])-(p1.dp[,1]-p0.dp[,1])))
predictions.diff[8,1:8] <- sum.coda(as.matrix((p1.n[,3]-p0.n[,3])-(p1.dp[,3]-p0.dp[,3])))

# Female
xvals  <- rep(0,16)
xvals[11] <- -0.514705882352941
p0.dp <- predict(m1_dpRE1, xvals)
p0.n <- predict_nRE(m1_nRE, xvals)
xvals[11] <- 0.485294117647059
p1.dp <- predict(m1_dpRE1, xvals)
p1.n <- predict_nRE(m1_nRE, xvals)
predictions.diff[9,1:8] <- sum.coda(as.matrix((p1.n[,1]-p0.n[,1])-(p1.dp[,1]-p0.dp[,1])))
predictions.diff[9,1:8] <- sum.coda(as.matrix((p1.n[,3]-p0.n[,3])-(p1.dp[,3]-p0.dp[,3])))

# Nonwhite
xvals  <- rep(0,16)
xvals[12] <- -0.0340557275541796
p0.dp <- predict(m1_dpRE1, xvals)
p0.n <- predict_nRE(m1_nRE, xvals)
xvals[12] <- 0.96594427244582
p1.dp <- predict(m1_dpRE1, xvals)
p1.n <- predict_nRE(m1_nRE, xvals)
predictions.diff[10,1:8] <- sum.coda(as.matrix((p1.n[,1]-p0.n[,1])-(p1.dp[,1]-p0.dp[,1])))
predictions.diff[10,1:8] <- sum.coda(as.matrix((p1.n[,3]-p0.n[,3])-(p1.dp[,3]-p0.dp[,3])))

# Degree
xvals  <- rep(0,16)
xvals[13] <- -0.196594427244582
p0.dp <- predict(m1_dpRE1, xvals)
p0.n <- predict_nRE(m1_nRE, xvals)
xvals[13] <- 0.803405572755418
p1.dp <- predict(m1_dpRE1, xvals)
p1.n <- predict_nRE(m1_nRE, xvals)
predictions.diff[11,1:8] <- sum.coda(as.matrix((p1.n[,1]-p0.n[,1])-(p1.dp[,1]-p0.dp[,1])))
predictions.diff[11,1:8] <- sum.coda(as.matrix((p1.n[,3]-p0.n[,3])-(p1.dp[,3]-p0.dp[,3])))

# Alevel
xvals  <- rep(0,16)
xvals[14] <- -0.190402476780186
p0.dp <- predict(m1_dpRE1, xvals)
p0.n <- predict_nRE(m1_nRE, xvals)
xvals[14] <- 0.809597523219814
p1.dp <- predict(m1_dpRE1, xvals)
p1.n <- predict_nRE(m1_nRE, xvals)
predictions.diff[12,1:8] <- sum.coda(as.matrix((p1.n[,1]-p0.n[,1])-(p1.dp[,1]-p0.dp[,1])))
predictions.diff[12,1:8] <- sum.coda(as.matrix((p1.n[,3]-p0.n[,3])-(p1.dp[,3]-p0.dp[,3])))

# Olevel
xvals  <- rep(0,16)
xvals[15] <- -0.419504643962848
p0.dp <- predict(m1_dpRE1, xvals)
p0.n <- predict_nRE(m1_nRE, xvals)
xvals[15] <- 0.580495356037152
p1.dp <- predict(m1_dpRE1, xvals)
p1.n <- predict_nRE(m1_nRE, xvals)
predictions.diff[13,1:8] <- sum.coda(as.matrix((p1.n[,1]-p0.n[,1])-(p1.dp[,1]-p0.dp[,1])))
predictions.diff[13,1:8] <- sum.coda(as.matrix((p1.n[,3]-p0.n[,3])-(p1.dp[,3]-p0.dp[,3])))

# H owner
xvals  <- rep(0,16)
xvals[4] <- -1.03954957105278
p0.dp <- predict(m1_dpRE1, xvals)
p0.n <- predict_nRE(m1_nRE, xvals)
xvals[4] <- 0.240462181241481
p1.dp <- predict(m1_dpRE1, xvals)
p1.n <- predict_nRE(m1_nRE, xvals)
predictions.diff[14,1:8] <- sum.coda(as.matrix((p1.n[,1]-p0.n[,1])-(p1.dp[,1]-p0.dp[,1])))
predictions.diff[14,1:8] <- sum.coda(as.matrix((p1.n[,3]-p0.n[,3])-(p1.dp[,3]-p0.dp[,3])))

# Divorced
xvals  <- rep(0,16)
xvals[9] <- -0.0568332596196373 
p0.dp <- predict(m1_dpRE1, xvals)
p0.n <- predict_nRE(m1_nRE, xvals)
xvals[9] <- 0.943166740380363
p1.dp <- predict(m1_dpRE1, xvals)
p1.n <- predict_nRE(m1_nRE, xvals)
predictions.diff[15,1:8] <- sum.coda(as.matrix((p1.n[,1]-p0.n[,1])-(p1.dp[,1]-p0.dp[,1])))
predictions.diff[15,1:8] <- sum.coda(as.matrix((p1.n[,3]-p0.n[,3])-(p1.dp[,3]-p0.dp[,3])))

# Unempl
xvals  <- rep(0,16)
xvals[10] <- -0.0339451570101725
p0.dp <- predict(m1_dpRE1, xvals)
p0.n <- predict_nRE(m1_nRE, xvals)
xvals[10] <- 0.966054842989827
p1.dp <- predict(m1_dpRE1, xvals)
p1.n <- predict_nRE(m1_nRE, xvals)
predictions.diff[16,1:8] <- sum.coda(as.matrix((p1.n[,1]-p0.n[,1])-(p1.dp[,1]-p0.dp[,1])))
predictions.diff[16,1:8] <- sum.coda(as.matrix((p1.n[,3]-p0.n[,3])-(p1.dp[,3]-p0.dp[,3])))



# Calculate differences in z-metric

# z-metric steady state est. from N() RE model
m1.coef.samp <- apply(m1_nRE[,5:20], 2, sample, 5000)
m1.gamma.samp <- sample(m1_nRE[,"gamma"], 5000)
m1.steady.state <- m1.coef.samp / (1-m1.gamma.samp)
# Difference
sum.coda(m3.steady.state - m1.steady.state)






